import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import librosa as librosa
import librosa.display
import os
df=pd.read_csv('note_info.csv')
df.head()
| Unnamed: 0 | note_str | sample_rate | qualities_str | instrument_source | instrument_family_str | instrument_family | note | instrument_source_str | qualities | pitch | instrument_str | instrument | velocity | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | keyboard_acoustic_004-060-025 | keyboard_acoustic_004-060-025 | 16000 | ['dark', 'reverb'] | 0 | keyboard | 4 | 278915 | acoustic | [0, 1, 0, 0, 0, 0, 0, 0, 1, 0] | 60 | keyboard_acoustic_004 | 327 | 25 |
| 1 | bass_synthetic_033-050-100 | bass_synthetic_033-050-100 | 16000 | ['dark'] | 2 | bass | 0 | 270361 | synthetic | [0, 1, 0, 0, 0, 0, 0, 0, 0, 0] | 50 | bass_synthetic_033 | 417 | 100 |
| 2 | bass_synthetic_009-052-050 | bass_synthetic_009-052-050 | 16000 | ['bright', 'distortion', 'long_release'] | 2 | bass | 0 | 270001 | synthetic | [1, 0, 1, 0, 1, 0, 0, 0, 0, 0] | 52 | bass_synthetic_009 | 150 | 50 |
| 3 | keyboard_electronic_003-064-127 | keyboard_electronic_003-064-127 | 16000 | [] | 1 | keyboard | 4 | 50978 | electronic | [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] | 64 | keyboard_electronic_003 | 65 | 127 |
| 4 | bass_synthetic_034-030-050 | bass_synthetic_034-030-050 | 16000 | ['distortion', 'tempo-synced'] | 2 | bass | 0 | 265159 | synthetic | [0, 0, 1, 0, 0, 0, 0, 0, 0, 1] | 30 | bass_synthetic_034 | 420 | 50 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 12678 entries, 0 to 12677 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 12678 non-null object 1 note_str 12678 non-null object 2 sample_rate 12678 non-null int64 3 qualities_str 12678 non-null object 4 instrument_source 12678 non-null int64 5 instrument_family_str 12678 non-null object 6 instrument_family 12678 non-null int64 7 note 12678 non-null int64 8 instrument_source_str 12678 non-null object 9 qualities 12678 non-null object 10 pitch 12678 non-null int64 11 instrument_str 12678 non-null object 12 instrument 12678 non-null int64 13 velocity 12678 non-null int64 dtypes: int64(7), object(7) memory usage: 1.4+ MB
len(df['pitch'].unique())
112
def extract_feature_means(audio_file_path: str) -> pd.DataFrame:
number_of_mfcc = 20
y, sr = librosa.load(audio_file_path)
signal, _ = librosa.effects.trim(y)
n_fft = 512
hop_length = 256
d_audio = np.abs(librosa.stft(signal, n_fft=n_fft, hop_length=hop_length))
db_audio = librosa.amplitude_to_db(d_audio, ref=np.max)
s_audio = librosa.feature.melspectrogram(signal, sr=sr)
s_db_audio = librosa.amplitude_to_db(s_audio, ref=np.max)
y_harm, y_perc = librosa.effects.hpss(signal)
spectral_centroids = librosa.feature.spectral_centroid(signal, sr=sr)[0]
spectral_centroids_delta = librosa.feature.delta(spectral_centroids, mode = 'nearest')
spectral_centroids_accelerate = librosa.feature.delta(spectral_centroids, order=2, mode = 'nearest')
hop_length = 256
chromagram = librosa.feature.chroma_stft(signal, sr=sr, hop_length=hop_length)
tempo_y, _ = librosa.beat.beat_track(signal, sr=sr)
spectral_rolloff = librosa.feature.spectral_rolloff(signal, sr=sr)[0]
onset_env = librosa.onset.onset_strength(y=signal, sr=sr)
spectral_bandwidth_2 = librosa.feature.spectral_bandwidth(signal, sr=sr)[0]
spectral_bandwidth_3 = librosa.feature.spectral_bandwidth(signal, sr=sr, p=3)[0]
spectral_bandwidth_4 = librosa.feature.spectral_bandwidth(signal, sr=sr, p=4)[0]
audio_features = {
"file_name": audio_file_path,
"zero_crossing_rate": np.mean(librosa.feature.zero_crossing_rate(signal)[0]),
"zero_crossings": np.sum(librosa.zero_crossings(signal, pad=False)),
"spectrogram": np.mean(db_audio[0]),
"mel_spectrogram": np.mean(s_db_audio[0]),
"harmonics": np.mean(y_harm),
"perceptual_shock_wave": np.mean(y_perc),
"spectral_centroids": np.mean(spectral_centroids),
"spectral_centroids_delta": np.mean(spectral_centroids_delta),
"spectral_centroids_accelerate": np.mean(spectral_centroids_accelerate),
"chroma1": np.mean(chromagram[0]),
"chroma2": np.mean(chromagram[1]),
"chroma3": np.mean(chromagram[2]),
"chroma4": np.mean(chromagram[3]),
"chroma5": np.mean(chromagram[4]),
"chroma6": np.mean(chromagram[5]),
"chroma7": np.mean(chromagram[6]),
"chroma8": np.mean(chromagram[7]),
"chroma9": np.mean(chromagram[8]),
"chroma10": np.mean(chromagram[9]),
"chroma11": np.mean(chromagram[10]),
"chroma12": np.mean(chromagram[11]),
"tempo_bpm": tempo_y,
"spectral_rolloff": np.mean(spectral_rolloff),
"spectral_flux": np.mean(onset_env),
"spectral_bandwidth_2": np.mean(spectral_bandwidth_2),
"spectral_bandwidth_3": np.mean(spectral_bandwidth_3),
"spectral_bandwidth_4": np.mean(spectral_bandwidth_4),
}
mfcc_df = extract_mfcc_feature_means(audio_file_path,
signal,
sample_rate=sr,
number_of_mfcc=number_of_mfcc)
df = pd.DataFrame.from_records(data=[audio_features])
df = pd.merge(df, mfcc_df, on='file_name')
return df
def extract_mfcc_feature_means(audio_file_name: str,
signal: np.ndarray,
sample_rate: int,
number_of_mfcc: int) -> pd.DataFrame:
mfcc_alt = librosa.feature.mfcc(y=signal, sr=sample_rate,
n_mfcc=number_of_mfcc)
delta = librosa.feature.delta(mfcc_alt)
accelerate = librosa.feature.delta(mfcc_alt, order=2)
mfcc_features = {
"file_name": audio_file_name,
}
for i in range(0, number_of_mfcc):
key_name = "".join(['mfcc', str(i)])
mfcc_value = np.mean(mfcc_alt[i])
mfcc_features.update({key_name: mfcc_value})
key_name = "".join(['mfcc_delta_', str(i)])
mfcc_value = np.mean(delta[i])
mfcc_features.update({key_name: mfcc_value})
key_name = "".join(['mfcc_accelerate_', str(i)])
mfcc_value = np.mean(accelerate[i])
mfcc_features.update({key_name: mfcc_value})
df = pd.DataFrame.from_records(data=[mfcc_features])
return df
path = r"C:\Users\ksivi\Desktop\New folder\nsynth-valid\audio"
dir_list = os.listdir(path)
len(dir_list)
12676
info = []
for i in dir_list:
try:
data = extract_feature_means("C:/Users/ksivi/Desktop/New folder/nsynth-valid/audio/"+i)
except:
continue
values = data.values
info.append(values[0])
df_2 = extract_feature_means('bass_electronic_018-022-025.wav')
df_2.head()
| file_name | zero_crossing_rate | zero_crossings | spectrogram | mel_spectrogram | harmonics | perceptual_shock_wave | spectral_centroids | spectral_centroids_delta | spectral_centroids_accelerate | ... | mfcc_accelerate_16 | mfcc17 | mfcc_delta_17 | mfcc_accelerate_17 | mfcc18 | mfcc_delta_18 | mfcc_accelerate_18 | mfcc19 | mfcc_delta_19 | mfcc_accelerate_19 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | bass_electronic_018-022-025.wav | 0.210125 | 15742 | -66.413757 | -77.642357 | 0.000006 | -0.000673 | 2671.302747 | 15.874826 | -0.008433 | ... | 0.046337 | -0.739872 | -0.046959 | 0.051725 | -0.290447 | -0.08219 | 0.018868 | 2.694299 | -0.056343 | -0.000569 |
1 rows × 88 columns
columns = df_2.columns
va = pd.DataFrame(data=info, columns = columns)
va['name'] = va['name'].str[:-4]
df['note_str'] = df['note_str'].astype(str)
samples = df.merge(va,left_on='note_str', right_on='name',how='right')
samples['instrument_family_str'].value_counts()
bass 2634 keyboard 2403 guitar 2070 organ 1598 brass 886 string 814 reed 720 mallet 663 flute 470 vocal 404 Name: instrument_family_str, dtype: int64
len(samples)
12662
plt.figure(figsize=(20,4))
x, sr = librosa.load("C:/Users/ksivi/Desktop/New folder/nsynth-valid/audio/"+dir_list[8512])
librosa.display.waveshow(y=x,sr=sr)
plt.title("Mallet Audio Wave")
plt.xlim(-0.1,2);
plt.savefig('mallet_wave.png')
y, sr = librosa.load("C:/Users/ksivi/Desktop/New folder/nsynth-valid/audio/"+dir_list[455])
fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True)
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
img = librosa.display.specshow(D, y_axis='linear', x_axis='time',sr=sr, ax=ax[0])
ax[0].set(title=dir_list[455])
ax[0].label_outer()
hop_length = 1024
D = librosa.amplitude_to_db(np.abs(librosa.stft(y, hop_length=hop_length)), ref=np.max)
librosa.display.specshow(D, y_axis='log', sr=sr, hop_length=hop_length,x_axis='time', ax=ax[1])
ax[1].set(title='Log-frequency power spectrogram')
ax[1].label_outer()
fig.colorbar(img, ax=ax, format="%+2.f dB")
ax[0].set_xlim(0,2)
ax[1].set_xlim(0,2);
plt.savefig('bass_spec.png')
y1, sr1 = librosa.load("C:/Users/ksivi/Desktop/New folder/nsynth-valid/audio/"+dir_list[4002])
y2, sr2 = librosa.load("C:/Users/ksivi/Desktop/New folder/nsynth-valid/audio/"+dir_list[455])
fig, axs = plt.subplots(nrows=2, ncols=2, sharex=False, figsize=(20,8))
fig.tight_layout(pad=5)
D1 = librosa.amplitude_to_db(np.abs(librosa.stft(y1)), ref=np.max)
img1 = librosa.display.specshow(D1, y_axis='linear', x_axis='time',sr=sr1, ax=axs[0,0])
D2 = librosa.amplitude_to_db(np.abs(librosa.stft(y2)), ref=np.max)
img2 = librosa.display.specshow(D2, y_axis='linear', x_axis='time',sr=sr2, ax=axs[0,1])
axs[0,0].set_title(dir_list[4002], fontsize=20)
axs[0,1].set_title(dir_list[455], fontsize=20)
hop_length = 1024
D1 = librosa.amplitude_to_db(np.abs(librosa.stft(y1, hop_length=hop_length)), ref=np.max)
librosa.display.specshow(D1, y_axis='log', sr=sr1, hop_length=hop_length,x_axis='time', ax=axs[1,0])
D2 = librosa.amplitude_to_db(np.abs(librosa.stft(y2, hop_length=hop_length)), ref=np.max)
librosa.display.specshow(D2, y_axis='log', sr=sr2, hop_length=hop_length,x_axis='time', ax=axs[1,1])
axs[1,0].set(title='Log-frequency power spectrogram')
axs[1,1].set(title='Log-frequency power spectrogram')
fig.colorbar(img, ax=axs, format="%+2.f dB")
plt.setp(axs,xlim=(0,2));
plt.savefig('guitar_bass_spec.png')
y3, sr3 = librosa.load("C:/Users/ksivi/Desktop/New folder/nsynth-valid/audio/"+dir_list[8512])
y4, sr4 = librosa.load("C:/Users/ksivi/Desktop/New folder/nsynth-valid/audio/"+dir_list[7481])
fig, axs = plt.subplots(nrows=2, ncols=2, sharex=False, figsize=(20,8))
fig.tight_layout(pad=5)
D1 = librosa.amplitude_to_db(np.abs(librosa.stft(y3)), ref=np.max)
img1 = librosa.display.specshow(D1, y_axis='linear', x_axis='time',sr=sr3, ax=axs[0,0])
D2 = librosa.amplitude_to_db(np.abs(librosa.stft(y4)), ref=np.max)
img2 = librosa.display.specshow(D2, y_axis='linear', x_axis='time',sr=sr4, ax=axs[0,1])
axs[0,0].set_title(dir_list[8512], fontsize=20)
axs[0,1].set_title(dir_list[7481],fontsize=20)
hop_length = 1024
D3 = librosa.amplitude_to_db(np.abs(librosa.stft(y3, hop_length=hop_length)), ref=np.max)
librosa.display.specshow(D3, y_axis='log', sr=sr3, hop_length=hop_length,x_axis='time', ax=axs[1,0])
D4 = librosa.amplitude_to_db(np.abs(librosa.stft(y4, hop_length=hop_length)), ref=np.max)
librosa.display.specshow(D4, y_axis='log', sr=sr4, hop_length=hop_length,x_axis='time', ax=axs[1,1])
axs[1,0].set(title='Log-frequency power spectrogram')
axs[1,1].set(title='Log-frequency power spectrogram')
fig.colorbar(img, ax=axs, format="%+2.f dB")
plt.setp(axs,xlim=(0,2));
plt.savefig('mallet_keyboard_spec.png')
plt.figure(figsize=(16,16), dpi=200)
sns.scatterplot(data=samples,x='spectral_centroids', y='zero_crossing_rate', hue = 'pitch',palette ='viridis')
plt.title('Zero Crossing Rate vs Spetral Centroids')
plt.xlabel('Spectral Centroids')
plt.ylabel('Zero Crossing Rate')
plt.savefig('cent_zero_cross_scatter.png')
plt.figure(figsize=(10,6), dpi=200)
sns.barplot(data=samples, x='instrument_family_str', y= 'mfcc3')
plt.xlabel('Instrument')
plt.ylabel('MFCC 3');
plt.title('MFCC 3 by Instrument')
plt.savefig('mfcc3_inst.png')
plt.figure(figsize=(10,6), dpi=200)
sns.barplot(data=samples, x='instrument_family_str', y= 'mfcc8')
plt.xlabel('Instrument')
plt.title('MFCC 8 by Instrument')
plt.ylabel('MFCC 8');
plt.savefig('mfcc8_inst.png')
plt.figure(figsize=(10,6), dpi=200)
sns.barplot(data=samples, x='instrument_family_str', y= 'spectral_bandwidth_2')
plt.xlabel('Instrument')
plt.ylabel('spectral_bandwidth')
plt.title('Spectral Bandwidth by Instrument');
plt.savefig('spec_band.png')
sns.pairplot(data=samples, vars = ['mfcc2','mfcc3','mfcc4','mfcc5','mfcc6','mfcc7','spectral_bandwidth_3'],hue='pitch', palette='viridis')
plt.title('Pair Plot of Various MFCCs by Pitch')
plt.savefig('pairplot.png')
samples.head()
| Unnamed: 0 | note_str | sample_rate | qualities_str | instrument_source | instrument_family_str | instrument_family | note | instrument_source_str | qualities | ... | mfcc17 | mfcc_delta_17 | mfcc_accelerate_17 | mfcc18 | mfcc_delta_18 | mfcc_accelerate_18 | mfcc19 | mfcc_delta_19 | mfcc_accelerate_19 | name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | bass_electronic_018-022-050 | 16000 | ['percussive'] | 1 | bass | 0 | 277009 | electronic | [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] | ... | -0.739872 | -0.046959 | 0.051725 | -0.290447 | -0.082190 | 0.018868 | 2.694299 | -0.056343 | -0.000569 | bass_electronic_018-022-050 |
| 1 | 1 | bass_electronic_018-022-127 | 16000 | ['fast_decay', 'percussive'] | 1 | bass | 0 | 223304 | electronic | [0, 0, 0, 1, 0, 0, 0, 1, 0, 0] | ... | -0.969568 | 0.028864 | -0.215973 | -2.457192 | -0.140675 | 0.338459 | -0.499463 | -0.884398 | 0.581262 | bass_electronic_018-022-127 |
| 2 | 2 | bass_electronic_018-023-050 | 16000 | [] | 1 | bass | 0 | 222626 | electronic | [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] | ... | 1.079908 | -0.011627 | 0.000507 | 0.035780 | 0.013179 | 0.005179 | -0.784336 | 0.031024 | -0.011306 | bass_electronic_018-023-050 |
| 3 | 3 | bass_electronic_018-023-100 | 16000 | [] | 1 | bass | 0 | 230338 | electronic | [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] | ... | 1.711984 | 0.009815 | 0.006658 | 0.741615 | -0.012899 | 0.003196 | -0.122626 | 0.010039 | -0.027255 | bass_electronic_018-023-100 |
| 4 | 4 | bass_electronic_018-024-050 | 16000 | [] | 1 | bass | 0 | 284868 | electronic | [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] | ... | 1.546056 | 0.016031 | -0.002961 | 0.683596 | -0.000470 | 0.006038 | -0.126844 | -0.017591 | -0.018448 | bass_electronic_018-024-050 |
5 rows × 103 columns
samples = samples.drop(['note_str', 'instrument_source', 'instrument_family','sample_rate', 'qualities_str', 'name','instrument_source_str', 'qualities', 'Unnamed: 0','instrument_str', 'instrument','velocity','file_name','note'], axis = 1)
note=samples[samples['pitch'] > 21]
note=note[note['pitch']<108]
inst = note.drop('pitch', axis=1)
note=note.drop('instrument_family_str',axis=1)
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
samples['instrument_family_str'].unique()
array(['bass', 'brass', 'flute', 'guitar', 'keyboard', 'mallet', 'organ',
'reed', 'string', 'vocal'], dtype=object)
Instrument Identification
X=inst.drop('instrument_family_str', axis=1)
y=inst['instrument_family_str']
scaler = StandardScaler()
X_train_inst, X_test_inst, y_train_inst, y_test_inst = train_test_split(X, y, test_size=0.15, random_state=101)
scaled_X_train_inst = scaler.fit_transform(X_train_inst)
scaled_X_test_inst = scaler.transform(X_test_inst)
log_model=LogisticRegression(solver='saga', multi_class='ovr', max_iter=10000)
penalty = ['l1', 'l2']
C = np.logspace(0, 4, 10)
grid_model = GridSearchCV(log_model,param_grid={'C':C,'penalty':penalty})
grid_model.fit(scaled_X_train_inst,y_train_inst)
GridSearchCV(estimator=LogisticRegression(max_iter=10000, multi_class='ovr',
solver='saga'),
param_grid={'C': array([1.00000000e+00, 2.78255940e+00, 7.74263683e+00, 2.15443469e+01,
5.99484250e+01, 1.66810054e+02, 4.64158883e+02, 1.29154967e+03,
3.59381366e+03, 1.00000000e+04]),
'penalty': ['l1', 'l2']})
y_preds = grid_model.predict(scaled_X_test_inst)
from sklearn.metrics import classification_report, plot_confusion_matrix
print(classification_report(y_preds,y_test_inst))
precision recall f1-score support
bass 0.83 0.77 0.80 422
brass 0.93 0.84 0.88 140
flute 0.72 0.72 0.72 80
guitar 0.71 0.72 0.71 315
keyboard 0.74 0.78 0.76 334
mallet 0.63 0.83 0.72 81
organ 0.95 0.91 0.93 274
reed 0.94 1.00 0.97 94
string 0.86 0.85 0.86 113
vocal 0.94 0.96 0.95 47
accuracy 0.81 1900
macro avg 0.83 0.84 0.83 1900
weighted avg 0.82 0.81 0.81 1900
plt.figure(figsize=(14,14))
plot_confusion_matrix(grid_model,scaled_X_test_inst,y_test_inst)
plt.xticks(rotation=90);
<Figure size 1008x1008 with 0 Axes>
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
test_error_rates = []
for k in range(1,100):
knn_model = KNeighborsClassifier(n_neighbors=k)
knn_model.fit(scaled_X_train_inst,y_train_inst)
y_pred_test = knn_model.predict(scaled_X_test_inst)
test_error = 1 - accuracy_score(y_test_inst,y_pred_test)
test_error_rates.append(test_error)
plt.figure(figsize=(10,6),dpi=200)
plt.plot(range(1,100),test_error_rates,label='Test Error')
plt.legend()
plt.ylabel('Error Rate')
plt.xlabel("K Value")
Text(0.5, 0, 'K Value')
KNN_model = KNeighborsClassifier(n_neighbors=1)
KNN_model.fit(scaled_X_train_inst,y_train_inst)
y_pred_test = KNN_model.predict(scaled_X_test_inst)
print(classification_report(y_pred_test,y_test_inst))
precision recall f1-score support
bass 0.99 0.96 0.98 390
brass 0.98 0.97 0.97 129
flute 1.00 0.98 0.99 89
guitar 0.97 0.96 0.96 286
keyboard 0.97 0.99 0.98 355
mallet 0.97 0.99 0.98 98
organ 0.98 1.00 0.99 226
reed 0.98 1.00 0.99 96
string 0.99 0.99 0.99 137
vocal 1.00 1.00 1.00 50
accuracy 0.98 1856
macro avg 0.98 0.98 0.98 1856
weighted avg 0.98 0.98 0.98 1856
from sklearn.ensemble import RandomForestClassifier
rand_model = RandomForestClassifier()
rand_model.fit(scaled_X_train_inst,y_train_inst)
RandomForestClassifier()
preds_inst = rand_model.predict(scaled_X_test_inst)
print(classification_report(preds_inst,y_test_inst))
precision recall f1-score support
bass 1.00 1.00 1.00 382
brass 1.00 1.00 1.00 128
flute 1.00 1.00 1.00 87
guitar 0.99 1.00 0.99 281
keyboard 1.00 0.99 1.00 363
mallet 1.00 1.00 1.00 100
organ 1.00 1.00 1.00 231
reed 1.00 1.00 1.00 98
string 1.00 1.00 1.00 136
vocal 1.00 1.00 1.00 50
accuracy 1.00 1856
macro avg 1.00 1.00 1.00 1856
weighted avg 1.00 1.00 1.00 1856
plot_confusion_matrix(rand_model,scaled_X_test_inst,y_test_inst)
plt.xticks(rotation = 90);
Note Identification
note.head()
| pitch | zero_crossing_rate | zero_crossings | spectrogram | mel_spectrogram | harmonics | perceptual_shock_wave | spectral_centroids | spectral_centroids_delta | spectral_centroids_accelerate | ... | mfcc_accelerate_16 | mfcc17 | mfcc_delta_17 | mfcc_accelerate_17 | mfcc18 | mfcc_delta_18 | mfcc_accelerate_18 | mfcc19 | mfcc_delta_19 | mfcc_accelerate_19 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 22 | 0.210125 | 15742 | -66.413757 | -77.642357 | 0.000006 | -0.000673 | 2671.302747 | 15.874826 | -0.008433 | ... | 0.046337 | -0.739872 | -0.046959 | 0.051725 | -0.290447 | -0.082190 | 0.018868 | 2.694299 | -0.056343 | -0.000569 |
| 1 | 22 | 0.062500 | 459 | -35.409962 | -52.084045 | 0.001413 | -0.001678 | 667.165083 | 136.698995 | 35.325723 | ... | -0.275118 | -0.969568 | 0.028864 | -0.215973 | -2.457192 | -0.140675 | 0.338459 | -0.499463 | -0.884398 | 0.581262 |
| 2 | 23 | 0.008180 | 589 | -25.864546 | -30.828018 | 0.000110 | 0.000114 | 129.129320 | 6.599145 | 0.871162 | ... | 0.013104 | 1.079908 | -0.011627 | 0.000507 | 0.035780 | 0.013179 | 0.005179 | -0.784336 | 0.031024 | -0.011306 |
| 3 | 23 | 0.007953 | 562 | -27.519205 | -33.968998 | 0.000027 | 0.000153 | 120.454945 | 5.414388 | 1.140152 | ... | 0.006131 | 1.711984 | 0.009815 | 0.006658 | 0.741615 | -0.012899 | 0.003196 | -0.122626 | 0.010039 | -0.027255 |
| 4 | 24 | 0.009085 | 646 | -26.927103 | -31.652479 | 0.000025 | 0.000088 | 127.136842 | 5.570173 | 0.700538 | ... | 0.010970 | 1.546056 | 0.016031 | -0.002961 | 0.683596 | -0.000470 | 0.006038 | -0.126844 | -0.017591 | -0.018448 |
5 rows × 88 columns
X = note.drop('pitch', axis=1)
y=note['pitch']
len(y)
12373
X_train_note, X_test_note, y_train_note, y_test_note = train_test_split(X, y, test_size=0.15, random_state=101)
rand_model2 = RandomForestClassifier()
rand_model2.fit(X_train_note,y_train_note)
RandomForestClassifier()
preds = rand_model2.predict(X_test_note)
print(classification_report(preds,y_test_note))
precision recall f1-score support
22 0.80 0.67 0.73 12
23 0.71 0.75 0.73 16
24 0.63 0.85 0.72 20
25 0.70 0.88 0.78 16
26 1.00 0.71 0.83 14
27 0.87 1.00 0.93 20
28 0.83 0.75 0.79 20
29 0.86 0.83 0.85 30
30 0.87 0.87 0.87 15
31 0.89 0.85 0.87 20
32 0.87 0.96 0.92 28
33 0.96 0.86 0.91 28
34 0.96 0.92 0.94 26
35 0.76 1.00 0.86 16
36 0.88 0.79 0.83 19
37 0.85 0.79 0.82 29
38 0.88 0.85 0.86 33
39 0.90 0.84 0.87 32
40 0.93 0.87 0.90 30
41 0.90 0.93 0.92 30
42 0.81 0.92 0.86 24
43 1.00 0.87 0.93 30
44 0.88 0.81 0.85 27
45 0.87 0.95 0.91 21
46 0.93 0.90 0.91 29
47 0.94 0.83 0.88 18
48 0.95 1.00 0.97 18
49 0.89 0.92 0.91 26
50 0.91 0.94 0.92 31
51 0.89 0.97 0.93 32
52 0.94 0.97 0.95 32
53 0.93 0.93 0.93 27
54 1.00 0.97 0.98 29
55 0.97 0.97 0.97 33
56 0.97 0.95 0.96 38
57 0.97 0.94 0.95 31
58 0.88 0.96 0.92 23
59 0.96 1.00 0.98 26
60 1.00 0.93 0.97 30
61 0.96 0.93 0.94 27
62 1.00 0.95 0.98 22
63 1.00 1.00 1.00 28
64 1.00 0.96 0.98 27
65 1.00 1.00 1.00 18
66 1.00 1.00 1.00 25
67 1.00 1.00 1.00 22
68 1.00 1.00 1.00 21
69 0.92 1.00 0.96 22
70 1.00 0.96 0.98 23
71 1.00 0.80 0.89 20
72 1.00 1.00 1.00 20
73 1.00 0.93 0.97 15
74 0.95 1.00 0.97 18
75 0.96 0.96 0.96 24
76 0.92 1.00 0.96 23
77 1.00 1.00 1.00 18
78 0.94 0.94 0.94 17
79 1.00 1.00 1.00 30
80 1.00 1.00 1.00 15
81 0.96 0.92 0.94 26
82 0.93 0.93 0.93 15
83 0.88 1.00 0.93 21
84 1.00 0.88 0.93 24
85 0.82 1.00 0.90 14
86 0.94 0.94 0.94 18
87 0.95 0.91 0.93 22
88 0.96 0.96 0.96 24
89 0.95 1.00 0.98 20
90 0.82 0.90 0.86 10
91 0.87 0.87 0.87 15
92 0.93 1.00 0.97 14
93 0.88 0.93 0.90 15
94 0.83 1.00 0.91 15
95 0.92 1.00 0.96 11
96 1.00 0.93 0.97 15
97 0.89 0.94 0.91 17
98 1.00 1.00 1.00 15
99 1.00 1.00 1.00 13
100 0.82 0.90 0.86 10
101 0.91 0.77 0.83 13
102 0.94 0.58 0.71 26
103 1.00 1.00 1.00 11
104 1.00 1.00 1.00 14
105 0.86 1.00 0.92 12
106 0.82 0.93 0.87 15
107 0.88 0.88 0.88 17
accuracy 0.92 1856
macro avg 0.92 0.92 0.92 1856
weighted avg 0.93 0.92 0.92 1856
from sklearn.model_selection import GridSearchCV
param_grid = {"n_estimators":[100,150,200,250],'max_depth':[6,10,14,20,25]}
rand_model3 = RandomForestClassifier()
grid = GridSearchCV(rand_model3,param_grid)
grid.fit(X_train_note,y_train_note)
GridSearchCV(estimator=RandomForestClassifier(),
param_grid={'max_depth': [6, 10, 14, 20, 25],
'n_estimators': [100, 150, 200, 250]})
grid.best_params_
{'max_depth': 25, 'n_estimators': 150}
preds = grid.predict(X_test_note)
print(classification_report(preds,y_test_note))
precision recall f1-score support
22 0.90 0.75 0.82 12
23 0.71 0.86 0.77 14
24 0.59 0.84 0.70 19
25 0.80 0.84 0.82 19
26 1.00 0.77 0.87 13
27 0.91 0.95 0.93 22
28 0.89 0.80 0.84 20
29 0.79 0.88 0.84 26
30 0.87 0.76 0.81 17
31 0.95 0.90 0.92 20
32 0.94 0.97 0.95 30
33 1.00 0.89 0.94 28
34 0.88 0.92 0.90 24
35 0.76 0.89 0.82 18
36 0.94 0.80 0.86 20
37 0.96 0.87 0.91 30
38 0.88 0.90 0.89 31
39 0.87 0.87 0.87 30
40 0.93 0.96 0.95 27
41 0.94 0.91 0.92 32
42 0.81 1.00 0.90 22
43 1.00 0.90 0.95 29
44 0.88 0.88 0.88 25
45 0.87 0.91 0.89 22
46 0.89 0.86 0.88 29
47 0.94 0.79 0.86 19
48 0.95 1.00 0.97 18
49 0.89 0.92 0.91 26
50 0.91 0.97 0.94 30
51 0.94 0.94 0.94 35
52 0.97 0.97 0.97 33
53 0.89 0.89 0.89 27
54 1.00 0.97 0.98 29
55 0.97 0.97 0.97 33
56 0.95 0.95 0.95 37
57 1.00 0.94 0.97 32
58 0.88 0.96 0.92 23
59 1.00 0.96 0.98 28
60 1.00 0.90 0.95 31
61 0.96 0.96 0.96 26
62 1.00 0.91 0.95 23
63 1.00 1.00 1.00 28
64 1.00 0.96 0.98 27
65 1.00 1.00 1.00 18
66 1.00 1.00 1.00 25
67 1.00 1.00 1.00 22
68 0.95 1.00 0.98 20
69 0.92 1.00 0.96 22
70 1.00 0.92 0.96 24
71 1.00 0.84 0.91 19
72 1.00 1.00 1.00 20
73 1.00 1.00 1.00 14
74 1.00 1.00 1.00 19
75 0.96 0.96 0.96 24
76 0.92 1.00 0.96 23
77 1.00 1.00 1.00 18
78 0.94 1.00 0.97 16
79 1.00 1.00 1.00 30
80 1.00 1.00 1.00 15
81 0.96 0.96 0.96 25
82 1.00 1.00 1.00 15
83 0.88 1.00 0.93 21
84 1.00 0.91 0.95 23
85 0.88 1.00 0.94 15
86 0.94 0.89 0.92 19
87 0.95 0.95 0.95 21
88 0.96 1.00 0.98 23
89 0.95 1.00 0.98 20
90 0.82 0.90 0.86 10
91 0.87 0.93 0.90 14
92 1.00 1.00 1.00 15
93 0.88 0.88 0.88 16
94 0.78 1.00 0.88 14
95 0.92 1.00 0.96 11
96 1.00 0.93 0.97 15
97 0.89 1.00 0.94 16
98 1.00 1.00 1.00 15
99 1.00 1.00 1.00 13
100 0.82 0.82 0.82 11
101 0.91 0.77 0.83 13
102 0.94 0.60 0.73 25
103 1.00 0.73 0.85 15
104 1.00 1.00 1.00 14
105 0.86 0.92 0.89 13
106 0.76 0.93 0.84 14
107 0.88 0.88 0.88 17
accuracy 0.93 1856
macro avg 0.93 0.93 0.92 1856
weighted avg 0.93 0.93 0.93 1856
X_test_note['Note'] = preds
X_test_note['Instrument'] = preds_inst
X_test_note.head()
| zero_crossing_rate | zero_crossings | spectrogram | mel_spectrogram | harmonics | perceptual_shock_wave | spectral_centroids | spectral_centroids_delta | spectral_centroids_accelerate | chroma1 | ... | mfcc_delta_17 | mfcc_accelerate_17 | mfcc18 | mfcc_delta_18 | mfcc_accelerate_18 | mfcc19 | mfcc_delta_19 | mfcc_accelerate_19 | Note | Instrument | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 7829 | 0.008167 | 599 | -17.000275 | -24.065882 | -2.391993e-05 | 0.000028 | 125.909052 | 3.842943 | 0.748480 | 0.122843 | ... | 0.074348 | -0.021655 | -2.285110 | 0.061437 | -0.010873 | 5.484561 | 0.001235 | -0.002155 | 30 | keyboard |
| 2043 | 0.018063 | 1385 | -65.334007 | -78.159966 | 3.126499e-05 | -0.000030 | 252.379844 | 7.212287 | 1.285274 | 0.009431 | ... | -0.035811 | 0.022589 | -11.201360 | -0.012952 | 0.028557 | -11.548903 | 0.006247 | 0.025766 | 43 | bass |
| 8896 | 0.013378 | 1185 | -58.515682 | -79.649185 | 3.395107e-05 | -0.000033 | 205.533172 | 7.821560 | 1.213353 | 0.501567 | ... | 0.075102 | 0.020115 | -18.010260 | 0.071818 | 0.034119 | -16.802776 | 0.003094 | 0.043800 | 46 | mallet |
| 1830 | 0.004121 | 319 | -14.570759 | -44.845161 | -1.256410e-02 | 0.000045 | 84.263868 | -2.796468 | 1.489708 | 0.724999 | ... | 0.027695 | 0.002246 | 9.528914 | 0.063975 | 0.017251 | 10.114882 | -0.099881 | -0.009263 | 77 | bass |
| 12077 | 0.018680 | 1650 | -36.952751 | -68.855492 | 5.404165e-07 | 0.000111 | 444.422995 | 5.354834 | 0.846387 | 0.067997 | ... | 0.005530 | -0.016946 | -5.744551 | 0.008136 | -0.012692 | -4.648269 | 0.050792 | -0.009209 | 39 | string |
5 rows × 89 columns
keyboard = X_test_note[X_test_note['Instrument'] == 'keyboard']
staff = keyboard[(keyboard['Note'] < 80) & (keyboard['Note'] > 40)]
len(staff)
193
staff.index=range(1,194)
plt.figure(figsize=(16,8), dpi=200)
plt.scatter(staff.index, staff['Note'])
plt.xlim(0,50)
plt.axhline(y=64, color='black', linestyle='-')
plt.axhline(y=67, color='black', linestyle='-')
plt.axhline(y=71, color='black', linestyle='-')
plt.axhline(y=74, color='black', linestyle='-')
plt.axhline(y=77, color='black', linestyle='-')
plt.axhline(y=43, color='black', linestyle='-')
plt.axhline(y=47, color='black', linestyle='-')
plt.axhline(y=50, color='black', linestyle='-')
plt.axhline(y=53, color='black', linestyle='-')
plt.axhline(y=57, color='black', linestyle='-');